require(plm)
require(runjags)
require(coda)

#### data preparation ####
DPJ.data.Study2 <- read.csv("DPJ_data_Study2.csv")

# exclude periods with no speech
# PS = plenary session, BC = Budget Committee, OC = other committees
DPJ.personnel.period.date.Study2 <- as.Date(c("2003-11-19", "2003-12-11", "2004-05-18", 
                                              "2004-09-13", "2005-09-17", "2006-04-07", 
                                              "2006-09-25", "2007-08-31", "2008-09-21", 
                                              "2009-05-16", "2009-09-16", "2010-06-08", 
                                              "2010-09-17", "2011-01-14", "2011-09-02", 
                                              "2012-01-13", "2012-06-04", "2012-10-01", 
                                              "2012-12-26", "2013-09-04", "2014-09-16", 
                                              "2015-01-18", "2015-10-13", "2016-03-27"))

DPJ.PS.data.Study2 <- DPJ.BC.data.Study2 <- DPJ.OC.data.Study2 <- DPJ.data.Study2
for (i in 1:(length(DPJ.personnel.period.date.Study2) - 1)) {
  temp.data <- subset(DPJ.data.Study2, time == i)
  if (sum(temp.data$PS.char > 0) == 0) {
    DPJ.PS.data.Study2 <- subset(DPJ.PS.data.Study2, time != i)
  }
  if (sum(temp.data$BC.char > 0) == 0) {
    DPJ.BC.data.Study2 <- subset(DPJ.BC.data.Study2, time != i)
  }
  if (sum(temp.data$OC.char > 0) == 0) {
    DPJ.OC.data.Study2 <- subset(DPJ.OC.data.Study2, time != i)
  }
}

# discard MPs whose ideal points are missing
DPJ.PS.data.Study2 <- subset(DPJ.PS.data.Study2, ! is.na(distance.1))
DPJ.BC.data.Study2 <- subset(DPJ.BC.data.Study2, ! is.na(distance.1))
DPJ.OC.data.Study2 <- subset(DPJ.OC.data.Study2, ! is.na(distance.1))

# standardize ideological distance variables
DPJ.PS.data.Study2$distance.1 <- scale(DPJ.PS.data.Study2$distance.1)
DPJ.PS.data.Study2$distance.2 <- scale(DPJ.PS.data.Study2$distance.2)
DPJ.BC.data.Study2$distance.1 <- scale(DPJ.BC.data.Study2$distance.1)
DPJ.BC.data.Study2$distance.2 <- scale(DPJ.BC.data.Study2$distance.2)
DPJ.OC.data.Study2$distance.1 <- scale(DPJ.OC.data.Study2$distance.1)
DPJ.OC.data.Study2$distance.2 <- scale(DPJ.OC.data.Study2$distance.2)
DPJ.PS.data.Study2$distance.1.w.P <- scale(DPJ.PS.data.Study2$distance.1.w.P)
DPJ.PS.data.Study2$distance.2.w.P <- scale(DPJ.PS.data.Study2$distance.2.w.P)
DPJ.BC.data.Study2$distance.1.w.P <- scale(DPJ.BC.data.Study2$distance.1.w.P)
DPJ.BC.data.Study2$distance.2.w.P <- scale(DPJ.BC.data.Study2$distance.2.w.P)
DPJ.OC.data.Study2$distance.1.w.P <- scale(DPJ.OC.data.Study2$distance.1.w.P)
DPJ.OC.data.Study2$distance.2.w.P <- scale(DPJ.OC.data.Study2$distance.2.w.P)

# convert data frames to the pdata.frame class of the plm package
DPJ.PS.pdata.Study2 <- pdata.frame(DPJ.PS.data.Study2, c("name_jp", "time"))
DPJ.BC.pdata.Study2 <- pdata.frame(DPJ.BC.data.Study2, c("name_jp", "time"))
DPJ.OC.pdata.Study2 <- pdata.frame(DPJ.OC.data.Study2, c("name_jp", "time"))

#### main analyses (Online Appendix E.2) ####
Study2.DPJ.PS <- plm(PS.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.PS.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.DPJ.PS, 
              vcov = vcovHC(Study2.DPJ.PS, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(DPJ.PS.pdata.Study2)  # number of observations
length(unique(DPJ.PS.pdata.Study2$name_jp))  # number of MPs
length(unique(DPJ.PS.pdata.Study2$time))  # number of periods

round(exp(Study2.DPJ.PS$coefficients[2]), 3)  # substantive interpretation

#### analysis of committees (Online Appendix E.3) ####
## Budget Committee
Study2.DPJ.BC <- plm(BC.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.BC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.DPJ.BC, 
              vcov = vcovHC(Study2.DPJ.BC, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(DPJ.BC.pdata.Study2)  # number of observations
length(unique(DPJ.BC.pdata.Study2$name_jp))  # number of MPs
length(unique(DPJ.BC.pdata.Study2$time))  # number of periods

## other committees
Study2.DPJ.OC <- plm(OC.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.OC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.DPJ.OC, 
              vcov = vcovHC(Study2.DPJ.OC, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(DPJ.OC.pdata.Study2)  # number of observations
length(unique(DPJ.OC.pdata.Study2$name_jp))  # number of MPs
length(unique(DPJ.OC.pdata.Study2$time))  # number of periods

#### robustness check: other dependent variables (Online Appendix E.4) ####
## number of speeches
Study2.DPJ.PS.count <- plm(PS.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = DPJ.PS.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.DPJ.PS.count, 
              vcov = vcovHC(Study2.DPJ.PS.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.DPJ.BC.count <- plm(BC.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = DPJ.BC.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.DPJ.BC.count, 
              vcov = vcovHC(Study2.DPJ.BC.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.DPJ.OC.count <- plm(OC.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = DPJ.OC.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.DPJ.OC.count, 
              vcov = vcovHC(Study2.DPJ.OC.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

## making one or more speeches
Study2.DPJ.PS.bin <- plm(PS.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = DPJ.PS.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.DPJ.PS.bin, 
              vcov = vcovHC(Study2.DPJ.PS.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.DPJ.BC.bin <- plm(BC.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = DPJ.BC.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.DPJ.BC.bin, 
              vcov = vcovHC(Study2.DPJ.BC.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.DPJ.OC.bin <- plm(OC.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = DPJ.OC.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.DPJ.OC.bin, 
              vcov = vcovHC(Study2.DPJ.OC.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

#### robustness check: censored regression (Online Appendix E.4) ####
# function to draw initial values
initial.fun.Tobit <- function(seed1, seed2, n.var, n.id) {
  set.seed(seed1)
  alpha <- rnorm(1)
  beta <- rnorm(n.var)
  sigma <- runif(1, 1, 2)
  list(alpha = alpha, beta = beta, sigma = sigma, 
       .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed2)
}
# draw initial values
Study2.DPJ.PS.Tobit.inits <- list()
for (i in 1:3) {
  Study2.DPJ.PS.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(DPJ.PS.data.Study2$name_jp))))
}
# MCMC sampling
Study2.DPJ.PS.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(DPJ.PS.data.Study2$PS.char > 0, DPJ.PS.data.Study2$PS.char, NA), 
                                            z = (DPJ.PS.data.Study2$PS.char > 0) * 1, 
                                            x = scale(as.matrix(DPJ.PS.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(DPJ.PS.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.DPJ.PS.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
# convergence check
print(which(gelman.diag(Study2.DPJ.PS.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
# posterior summary
round(summary(Study2.DPJ.PS.Tobit), 3)
# posterior probability that the coefficients of left-right distance (beta[1])
# and economic distance (beta[2]) are negative
round(mean(unlist(Study2.DPJ.PS.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.DPJ.PS.Tobit$mcmc[, 2]) < 0), 3)

Study2.DPJ.BC.Tobit.inits <- list()
for (i in 1:3) {
  Study2.DPJ.BC.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(DPJ.BC.data.Study2$name_jp))))
}
Study2.DPJ.BC.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(DPJ.BC.data.Study2$BC.char > 0, DPJ.BC.data.Study2$BC.char, NA), 
                                            z = (DPJ.BC.data.Study2$BC.char > 0) * 1, 
                                            x = scale(as.matrix(DPJ.BC.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(DPJ.BC.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.DPJ.BC.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
print(which(gelman.diag(Study2.DPJ.BC.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
round(summary(Study2.DPJ.BC.Tobit), 3)
round(mean(unlist(Study2.DPJ.BC.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.DPJ.BC.Tobit$mcmc[, 2]) < 0), 3)

Study2.DPJ.OC.Tobit.inits <- list()
for (i in 1:3) {
  Study2.DPJ.OC.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(DPJ.OC.data.Study2$name_jp))))
}
Study2.DPJ.OC.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(DPJ.OC.data.Study2$OC.char > 0, DPJ.OC.data.Study2$OC.char, NA), 
                                            z = (DPJ.OC.data.Study2$OC.char > 0) * 1, 
                                            x = scale(as.matrix(DPJ.OC.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(DPJ.OC.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.DPJ.OC.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
print(which(gelman.diag(Study2.DPJ.OC.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
round(summary(Study2.DPJ.OC.Tobit), 3)
round(mean(unlist(Study2.DPJ.OC.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.DPJ.OC.Tobit$mcmc[, 2]) < 0), 3)

#### robustness check: another definition of top leaders (Online Appendix D.3) ####
Study2.PS.w.P <- plm(PS.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.PS.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.PS.w.P, 
              vcov = vcovHC(Study2.PS.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.BC.w.P <- plm(BC.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.BC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.BC.w.P, 
              vcov = vcovHC(Study2.BC.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.OC.w.P <- plm(OC.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = DPJ.OC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.OC.w.P, 
              vcov = vcovHC(Study2.OC.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)